% Find SELECT for changes to sample, specification, solver, etc

clc;
clear;
format long

% Import data file, SELECT sample, and label variables:
data=importdata('Estimation_data_all.txt');                 % N=2682: all completed main-draw matches with non-missing environmental conditions

% SELECT ability_metric 1=pre-match odds, 2.5=granular rank, or 4=z-scores
ability_metric=1;
% SELECT prize spread 1=ones, 2=cash prize spread, 3=points prize spread
prize_metric=2;
% SELECT finite difference for covariance
h=1e-02;


if ability_metric==1
    dvBet=data(:,10);                              % 1. pre-match odds are available for both players
    data_sample=data(dvBet==1,:);                  % N=2157: all among the 2682 matches for which pre-match odds are available (both players)
elseif ability_metric==2.5
    dvRank=data(:,15);                             % 2.5. (granular) WTA ranks are available for both players
    data_sample=data(dvRank==1,:);                 % N=2670: all among the 2682 matches for which WTA ranks are available (both players)
elseif ability_metric==4
    dvPts=data(:,18);                              % 4. normalized ranking points (z-scores) are available for both players
    data_sample=data(dvPts==1,:);                  % N=2670: all among the 2682 matches for which WTA ranking points are available (both players)--this happens to be the same sample as in 2
end

clear data dvBet dvRank dvPts;

N=size(data_sample,1);

w1=data_sample(:,1);           % set 1 games won by match winner
l1=data_sample(:,2);           % set 1 games won by match loser
w2=data_sample(:,3);           % set 2 games won by match winner
l2=data_sample(:,4);           % set 2 games won by match loser
w3=data_sample(:,5);           % set 3 games won by match winner (if applicable)
l3=data_sample(:,6);           % set 3 games won by match loser (if applicable)
%wSets=data_sample(:,7);        % number of sets won by match winner
%lSets=data_sample(:,8);        % number of sets won by match loser
%tSets=data_sample(:,9);        % total number of sets
wWinProb=data_sample(:,11);    % (if pre-match odds are available) ex-ante winning probability of match winner
lWinProb=data_sample(:,12);    % (if pre-match odds are available) ex-ante winning probability of match loser
winProbDiff=data_sample(:,13); % abs(wWinProb-lWinProb)
wRank=data_sample(:,16);       % (if both players are ranked) WTA rank of match winner
lRank=data_sample(:,17);       % (if both players are ranked) WTA rank of match loser
%wPts=data_sample(:,19);        % (if both players have points) WTA ranking points of match winner
%lPts=data_sample(:,20);        % (if both players have points) WTA ranking points of match loser
wPtsN=data_sample(:,21);       % (if both players have points) WTA z-score of match winner
lPtsN=data_sample(:,22);       % (if both players have points) WTA z-score of match loser
%wAge=data_sample(:,24);        % (if age is available) age of match winner
%lAge=data_sample(:,25);        % (if age is available) age of match loser
%ageDiff=data_sample(:,26);     % (if age is available) abs(wAge-lAge)
prizeCashK=data_sample(:,27);  % win-lose cash prize spread of match (in 1000 USD)
prizePts=data_sample(:,28);    % win-lose WTA ranking points prize spread of match  
tpORtp3h=data_sample(:,29);    % temperature during match
Cpm25=data_sample(:,30);       % pollution during match
raining=data_sample(:,32);	   % (if record of rain is available) dummy for any rain during match
%dvAussieO=data_sample(:,33);   % Australian Open match
%dvChinaO=data_sample(:,34);    % China Open match
%dvWuhanO=data_sample(:,35);    % Wuhan Open match
%dvOtherC=data_sample(:,36);    % Other Chinese WTA series match (other than in Beijing and in Wuhan)
round=data_sample(:,37);       % round of WTA series

clear data_sample;
       

% We want to label players according to low (marginal) cost (of effort) "l" and high cost "h" (not winner "w" and loser "l")
% Cost will be a function of ability, so index match winner according to whether she was the low-cost (equivalently, more-able) player

if ability_metric==1
    LowCost_WinsMatch=(wWinProb>=lWinProb);                                                             % (1596 out of 2157 matches = 74%) 1. Ability proxy is pre-match winning probability (betting odds)
    LowCost_Ability =(.01*wWinProb).*LowCost_WinsMatch + (.01*lWinProb).*(ones(N,1)-LowCost_WinsMatch); % max(wWinProb,lWinProb) and min(wWinProb,lWinProb), respectively, would work just as well
    HighCost_Ability=(.01*lWinProb).*LowCost_WinsMatch + (.01*wWinProb).*(ones(N,1)-LowCost_WinsMatch); % (used only for observations where pre-match odds are available for both players)
elseif ability_metric==2.5
    LowCost_WinsMatch=(wRank<=lRank);                                                                   % (1871 out of 2670 matches = 70%) 2.5. (granular) Ability proxy is WTA rank
    LowCost_Ability =wRank.*LowCost_WinsMatch    + lRank.*(ones(N,1)-LowCost_WinsMatch);                % min(wRank,lRank) and max(wRank,lRank), respectively, would work just as well
    HighCost_Ability=lRank.*LowCost_WinsMatch    + wRank.*(ones(N,1)-LowCost_WinsMatch);                % (used only for observations where WTA ranks are available for both players)
elseif ability_metric==4
    LowCost_WinsMatch=(wPtsN>=lPtsN);                                                                   % (1870 out of 2670 matches = 70%) 4. Ability proxy is WTA z-score (normalized ranking points)
    LowCost_Ability =wPtsN.*LowCost_WinsMatch    + lPtsN.*(ones(N,1)-LowCost_WinsMatch);                % max(wPtsN,lPtsN) and min(wPtsN,lPtsN), respectively, would work just as well
    HighCost_Ability=lPtsN.*LowCost_WinsMatch    + wPtsN.*(ones(N,1)-LowCost_WinsMatch);                % (used only for observations where WTA ranking points are available for both players)   
end


% Prepare indicators for match outcomes
LowCost_Set1Games =w1.*(LowCost_WinsMatch==1)+l1.*(LowCost_WinsMatch==0);
HighCost_Set1Games=l1.*(LowCost_WinsMatch==1)+w1.*(LowCost_WinsMatch==0);
LowCost_Set2Games =w2.*(LowCost_WinsMatch==1)+l2.*(LowCost_WinsMatch==0);
HighCost_Set2Games=l2.*(LowCost_WinsMatch==1)+w2.*(LowCost_WinsMatch==0);
LowCost_Set3Games =w3.*(LowCost_WinsMatch==1)+l3.*(LowCost_WinsMatch==0);
HighCost_Set3Games=l3.*(LowCost_WinsMatch==1)+w3.*(LowCost_WinsMatch==0);
outcome_ll= ( (LowCost_Set1Games>HighCost_Set1Games) & (LowCost_Set2Games>HighCost_Set2Games) );
outcome_lhl=( (LowCost_Set1Games>HighCost_Set1Games) & (LowCost_Set2Games<HighCost_Set2Games) & (LowCost_Set3Games>HighCost_Set3Games) );
outcome_lhh=( (LowCost_Set1Games>HighCost_Set1Games) & (LowCost_Set2Games<HighCost_Set2Games) & (LowCost_Set3Games<HighCost_Set3Games) );
outcome_hll=( (LowCost_Set1Games<HighCost_Set1Games) & (LowCost_Set2Games>HighCost_Set2Games) & (LowCost_Set3Games>HighCost_Set3Games) );
outcome_hlh=( (LowCost_Set1Games<HighCost_Set1Games) & (LowCost_Set2Games>HighCost_Set2Games) & (LowCost_Set3Games<HighCost_Set3Games) );
outcome_hh= ( (LowCost_Set1Games<HighCost_Set1Games) & (LowCost_Set2Games<HighCost_Set2Games) );
outcome=[outcome_ll outcome_lhl outcome_lhh outcome_hll outcome_hlh outcome_hh];
clear LowCost_Set1Games HighCost_Set1Games LowCost_Set2Games HighCost_Set2Games LowCost_Set3Games HighCost_Set3Games;
clear outcome_ll outcome_lhl outcome_lhh outcome_hll outcome_hlh outcome_hh;


% w/l in original variable names is confusing, since l denotes loser, not low-cost player; thus cleanse the variables that will not be used further
clear wRank lRank wPts lPts wPtsN lPtsN l1 l2 l3 w1 w2 w3;
clear wAge lAge ageDiff;

if prize_metric==1
    V_l=ones(N,1);
    V_h=V_l;
elseif prize_metric==2
    V_l=prizeCashK;
    V_h=V_l;
elseif prize_metric==3
    V_l=prizePts;
    V_h=V_l;
end
clear prizeCash prizePts;

% Parameters that are fixed
cutoff_tp=27;                           % Coefficient can apply on bin or linearly in the difference or log difference beyond threshold
cutoff_pm=150/100;
delta_0=0;

% Parameters that are estimated: Initial values over which search takes place.
k_ini       =1;
if ability_metric==1
    theta_ini=0;            % 1. Ability proxy is pre-match winning probability (betting odds)
elseif ability_metric==2.5
    theta_ini=0;            % 2.5. Ability proxy is WTA rank (granular)
elseif ability_metric==4
    theta_ini=0;            % 4. Ability proxy is WTA z-score (normalized ranking points)
end
delta_tp_ini=0; 
delta_pm_ini=0;
sigma_epsilon_ini=0;

parameter_fixed=[ cutoff_tp;            % Temperature (tpORtp3h) cutoff
                  cutoff_pm;            % PM2.5 (Cpm25) cutoff
                  delta_0 ];            % Delta intercept
parameter_ini=[ k_ini;                  % technology parameter (randomness with which cost differences translate into winning probabilities)
                theta_ini;              % cost parameter
                delta_tp_ini;           % temperature disutility parameter (variation in sample is 0 = 25 C to 19 = 44 C)
                delta_pm_ini;           % PM2.5 disutility parameter (variation in sample is 0 = 100 ug/m3 to 4.2 = 520 ug/m3)
                sigma_epsilon_ini ];    % pins down the variance of the additive cost error      
clear k_ini theta_ini delta_tp_ini delta_pm_ini sigma_epsilon_ini;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 1000 simulations
% S=1000;
S=100;
    % to store results:
    sim_minError =repmat(0,1,S);
    %sim_fvalIni  =repmat(0,1,S);
    sim_parameter=repmat(0,size(parameter_ini,1),S);
    sim_fval     =repmat(0,1,S);
    sim_exitflag  =repmat(0,1,S);
    
rng(55555);
for s=1:S
    s
    LowCost_Error =normrnd(0,1,N,1);	% standard normal, N(0,1^2) (this becomes 'data': the number of sigmas a player is from her mean, on waking up that day)
    HighCost_Error=normrnd(0,1,N,1);
    sim_minError(s)=min([LowCost_Error; HighCost_Error]);    

    %display('The mean (negative of the) log likelihood contribution across matches at the initial parameter values is ')
    %sim_fvalIni(s)= likelihood_obj(parameter_ini,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed,LowCost_Error,HighCost_Error)
    
    % Optimization
    options = optimset('algorithm','interior-point','display','iter','maxiter',100000,'maxfunevals',100000,'TolX',1e-20,'TolFun',1e-20,'TolCon',1e-6,'Hessian','bfgs');
    A=[]; b=[];
    lb=[0; 0;   -Inf; -Inf; 0];
    ub=[1; Inf; Inf;  Inf;  .2];      % Taking min c_i as 1: 1 - 4 sd cannot be negative, thus max sigma is .25 or .2
    tic
    %[parameter,fval,exitflag,output]=knitromatlab(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed,LowCost_Error,HighCost_Error),parameter_ini,A,b,[],[],lb,ub,[],[],options,[])
    [parameter,fval,exitflag,output]=knitromatlab(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed,LowCost_Error,HighCost_Error),parameter_ini,[],[],[],[],lb,ub,[],[],options,[])
    %[parameter,fval,exitflag,output]=knitromatlab(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed,LowCost_Error,HighCost_Error),parameter_ini,[],[],[],[],[],[],[],[],options,[])
    toc

    if ability_metric==4                                            % 4. Ability proxy is WTA z-score (normalized ranking points)
        parameter_ini=[parameter(1); parameter(2); .1; 1.1; parameter(5)];        % One more optimization, using estimates of environmental parameters based on betting odds as initial values
        [parameter,fval,exitflag,output]=knitromatlab(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed,LowCost_Error,HighCost_Error),parameter_ini,[],[],[],[],lb,ub,[],[],options,[])
        % Confirm with fminsearch
        options=optimset('display','iter','maxiter',100000,'maxfunevals',100000,'TolX',1e-15,'TolFun',1e-10);   % tolerance tighter than used in ES (with fminsearch)  
        [parameter,fval,exitflag,output]=fminsearch(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed,LowCost_Error,HighCost_Error), parameter_ini, options)
    end

    sim_parameter(:,s)=parameter;
    sim_fval(s)       =fval;
    sim_exitflag(s)   =exitflag;

end    
    

